# remove(list = ls())
### loading packages
library(haven)
library(reactable)
library(sf)
library(data.table)
library(saeTrafo)
library(emdi)
library(tidyverse)In this script the SAE models FH and BHF will be calculated.
The Variable selection resulted that we will use this Formula, for further specification of the Models.
aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + ocu_unskilled + sex_male + read_knowing + urbrur_urban + health_insurance_yes + interior_plastered_yes + v04_revoq_Missing + car_yes + water_heater_yes + kitchen_yes
### auxillary data needed to calculate FH model
dat_census_aux <- readRDS("../data_raw/2024_census/processed/census_aux_data_aggregated.RDS")
#### load processed dummy coded census data
dat <- readRDS("../data_raw/2024_census/processed/dat_dummyCoding.RDS")
pop_table <- dat %>% group_by(ID_prov) %>% count()
#### load sample data
### 172 much better for FH
# sample <- readRDS("../data_raw/samples/transformed-2/sample_172.RDS")
sample <- readRDS("../data_raw/samples/sample_for_model_building_transformed.RDS")
sample %>% class()## [1] "data.table" "data.frame"
sample <- sample %>% as.data.frame()
# FH full nutzt aggregated/shares aus dat_census_aux
fh_formula <- Mean ~ mean_p26_edad + share_ocu_military + share_ocu_manager +
share_ocu_professional + share_ocu_technician + share_ocu_adminSupport +
share_ocu_serviceSales + share_ocu_agriculture + share_ocu_unskilled +
share_sex_male + share_read_knowing + share_urbrur_urban + share_health_insurance_yes +
share_interior_plastered_yes + share_v04_revoq_Missing + share_car_yes + share_water_heater_yes +
share_kitchen_yes
# BHF nutzt individuelle Dummyvariablen im Sample/Census
bhf_formula <- aestudio ~ p26_edad + ocu_military + ocu_manager +
ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales +
ocu_agriculture + ocu_unskilled + sex_male + read_knowing +
urbrur_urban + health_insurance_yes + interior_plastered_yes +
v04_revoq_Missing + car_yes + water_heater_yes + kitchen_yes
sample$aestudio %>% hist()## .
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 173 46 75 131 106 178 96 81 130 63 69 67 568 62 89 109 25 164 2 22
## 20 22 23
## 1 1 2
We have a simple random sample, which makes calculating the sample variance much easier, and we dont need second order inclusion criterion, nor do we need to use bootsrap or jacknife, to calculate the direct estimate and the domain specific variance.
This is the HT estimator for the totls in a domain:\(\hat{Y}_d = \sum_{i \in s_d} \frac{y_i}{\pi_i}\)
This then is the resultig design-based variance: \[ \text{Var}_\pi(\hat{Y}_d) = \left(1 - \frac{n_d}{N_d}\right) \frac{N_d^2 S_d^2}{n_d} \]
Now we are not interest in the totals, but in the variance of the mean, which results in this formula: \[ \text{Var}_\pi(\bar{y}_d) = \frac{1}{N_d^2} \text{Var}_\pi(\hat{Y}_d) = \left(1 - \frac{n_d}{N_d}\right) \frac{S_d^2}{n_d} \]
# direct_estimates <- sample %>%
# group_by(ID_prov) %>%
# summarise(
# Mean = mean(aestudio, na.rm = TRUE),
# n_eff = sum(!is.na(aestudio)),
# Var_Mean = var(aestudio, na.rm = T)/n_eff, #### we still have t inlcude FPC
# .groups = "drop"
# )
sample <- sample %>% left_join(pop_table,by = "ID_prov")
direct_estimates <- sample %>%
group_by(ID_prov) %>%
reframe(
Mean = mean(aestudio, na.rm = TRUE),
n_eff = sum(!is.na(aestudio)),
n = mean(n,na.rm = T),
Var_Mean = (1 - (n_eff/n)) * var(aestudio, na.rm = T)/n_eff
)
### recreate direct object
direct_rec <- list(
"ind" = data.frame("Domain" = direct_estimates$ID_prov,
"Mean" = direct_estimates$Mean),
"MSE" = data.frame("Domain" = direct_estimates$ID_prov,
"Mean" = direct_estimates$Var_Mean)
)
class(direct_rec) <- c("direct", "emdi")
# direct_rec %>% class()
# direct %>% class()# direct_n50 <- emdi::direct(y = "aestudio", smp_data = sample ,smp_domains = "ID_prov",var = T,na.rm = T,B = 50)
# direct_n500 <- emdi::direct(y = "aestudio", smp_data = sample ,smp_domains = "ID_prov",var = T,na.rm = T,B = 500)
# saveRDS(direct_n50,"../data_raw/simulation/processed/direct_50_bootstraps.RDS")
# saveRDS(direct_n500,"../data_raw/simulation/processed/direct_500_bootstraps.RDS")
direct_n500 <- readRDS("../data_raw/simulation/processed/direct_500_bootstraps.RDS")
direct_n50 <- readRDS("../data_raw/simulation/processed/direct_50_bootstraps.RDS")
tmp_dat <- merge(direct_n50$MSE[,c("Domain","Mean")],direct_estimates[,c("ID_prov","Var_Mean")],by.x = "Domain", by.y = "ID_prov")
plot(tmp_dat$Mean,tmp_dat$Var_Mean)tmp_dat <- merge(direct_n500$MSE[,c("Domain","Mean")],direct_estimates[,c("ID_prov","Var_Mean")],by.x = "Domain", by.y = "ID_prov")
plot(tmp_dat$Mean,tmp_dat$Var_Mean)fh_model_null <- fh(
fixed = Mean ~ 1,
vardir = "Var_Mean",
combined_data = combined_data,
domains = "ID_prov",
method = "reml",
MSE = TRUE
)
# fh_model_null %>% summary()
fh_model_null$MSE$FH %>% mean()## [1] 0.7744406
### ml, becasue step funcion only works with ml
fh_model_full_step <- fh(
fixed = fh_formula,
vardir = "Var_Mean",
combined_data = combined_data,
domains = "ID_prov",
method = "ml",
MSE = TRUE
)## Warning in fh(fixed = fh_formula, vardir = "Var_Mean", combined_data =
## combined_data, : The estimate of the variance of the random effects falls at
## the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Call:
## fh(fixed = fh_formula, vardir = "Var_Mean", combined_data = combined_data,
## domains = "ID_prov", method = "ml", MSE = TRUE)
##
## Out-of-sample domains: 0
## In-sample domains: 113
##
## Variance and MSE estimation:
## Variance estimation method: ml
## Estimated variance component(s): 7.807881e-05
## MSE method: datta-lahiri
##
## Coefficients:
## coefficients std.error t.value p.value
## (Intercept) 8.263570 5.952785 1.3882 0.165081
## mean_p26_edad -0.212057 0.075946 -2.7922 0.005235 **
## share_ocu_military 72.486824 71.213423 1.0179 0.308734
## share_ocu_manager 14.312424 115.050334 0.1244 0.900997
## share_ocu_professional 27.912394 9.150236 3.0505 0.002285 **
## share_ocu_technician 64.066376 38.510533 1.6636 0.096191 .
## share_ocu_adminSupport -113.973459 64.813123 -1.7585 0.078664 .
## share_ocu_serviceSales 11.518898 7.481164 1.5397 0.123629
## share_ocu_agriculture 2.531023 2.529610 1.0006 0.317040
## share_ocu_unskilled 5.340755 13.847879 0.3857 0.699739
## share_sex_male -10.285093 6.494636 -1.5836 0.113278
## share_read_knowing 14.462073 3.204910 4.5125 6.408e-06 ***
## share_urbrur_urban -1.483130 1.137654 -1.3037 0.192345
## share_health_insurance_yes 7.966022 2.720474 2.9282 0.003410 **
## share_interior_plastered_yes 0.215901 1.028335 0.2100 0.833705
## share_v04_revoq_Missing -8.088745 5.466903 -1.4796 0.138984
## share_car_yes -4.665394 1.854700 -2.5154 0.011888 *
## share_water_heater_yes 3.384140 4.281597 0.7904 0.429299
## share_kitchen_yes -2.567085 1.619506 -1.5851 0.112943
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Explanatory measures:
## loglike AIC BIC KIC AdjR2 FH_R2
## 1 -159.245 358.49 413.0378 378.49 0.657933 0.903335
##
## Residual diagnostics:
## Skewness Kurtosis Shapiro_W Shapiro_p
## Standardized_Residuals -0.3671767 3.643396 0.9834297 0.17651817
## Random_effects -0.5064880 4.593973 0.9684738 0.00901226
##
## Transformation: No transformation
fh_model_full_unoptimized <- fh(
fixed = fh_formula,
vardir = "Var_Mean",
combined_data = combined_data,
domains = "ID_prov",
method = "reml",
MSE = TRUE
)
fh_model_full_unoptimized$MSE$FH %>% mean()## [1] 0.3002415
## Empirical Best Linear Unbiased Prediction (Fay-Herriot)
##
## Out-of-sample domains: 0
## In-sample domains: 113
##
## Variance and MSE estimation:
## Variance estimation method: reml
## Variance of random effects: 0.1191395
## MSE method: prasad-rao
##
## Transformation: No transformation
### model selection
# emdi::step(fh_model_full_step,direction ="both",trace = 0) ### trace null doesn work
fh_model_step <- suppressWarnings(
suppressMessages(
emdi::step(fh_model_full_step, direction = "both", trace = FALSE)
)
)
coefficients(fh_model_step)## (Intercept) mean_p26_edad
## 9.453181 -0.199955
## share_ocu_professional share_ocu_technician
## 29.466319 65.682717
## share_ocu_adminSupport share_ocu_serviceSales
## -120.453130 9.465757
## share_sex_male share_read_knowing
## -10.734112 14.674394
## share_urbrur_urban share_health_insurance_yes
## -1.665386 6.771021
## share_v04_revoq_Missing share_car_yes
## -6.953541 -4.090169
## share_kitchen_yes
## -2.494927
fh_formula_optimized <- Mean ~ mean_p26_edad + share_ocu_professional + share_ocu_technician +
share_ocu_adminSupport + share_ocu_serviceSales + share_sex_male +
share_read_knowing + share_urbrur_urban + share_health_insurance_yes +
share_v04_revoq_Missing + share_car_yes + share_kitchen_yes
### which is the optimized one
fh_model_full <- fh(fixed = fh_formula_optimized,
vardir = "Var_Mean", combined_data = combined_data, domains = "ID_prov",
method = "reml", MSE = TRUE)
fh_model_full$MSE$FH %>% mean()## [1] 0.225684
## Empirical Best Linear Unbiased Prediction (Fay-Herriot)
##
## Out-of-sample domains: 0
## In-sample domains: 113
##
## Variance and MSE estimation:
## Variance estimation method: reml
## Variance of random effects: 0.08655872
## MSE method: prasad-rao
##
## Transformation: No transformation
## Call:
## fh(fixed = fh_formula_optimized, vardir = "Var_Mean", combined_data = combined_data,
## domains = "ID_prov", method = "reml", MSE = TRUE)
##
## Out-of-sample domains: 0
## In-sample domains: 113
##
## Variance and MSE estimation:
## Variance estimation method: reml
## Estimated variance component(s): 0.08655872
## MSE method: prasad-rao
##
## Coefficients:
## coefficients std.error t.value p.value
## (Intercept) 9.317664 5.921306 1.5736 0.1155840
## mean_p26_edad -0.197652 0.061205 -3.2293 0.0012408 **
## share_ocu_professional 29.144729 7.989834 3.6477 0.0002646 ***
## share_ocu_technician 63.984394 36.274784 1.7639 0.0777521 .
## share_ocu_adminSupport -116.601984 64.155966 -1.8175 0.0691441 .
## share_ocu_serviceSales 9.484141 6.113020 1.5515 0.1207901
## share_sex_male -10.776340 6.241717 -1.7265 0.0842571 .
## share_read_knowing 14.726894 3.109621 4.7359 2.181e-06 ***
## share_urbrur_urban -1.693564 0.983566 -1.7219 0.0850948 .
## share_health_insurance_yes 6.743896 2.374934 2.8396 0.0045168 **
## share_v04_revoq_Missing -6.789006 5.062317 -1.3411 0.1798923
## share_car_yes -4.137770 1.475930 -2.8035 0.0050551 **
## share_kitchen_yes -2.439641 1.608233 -1.5170 0.1292743
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Explanatory measures:
## loglike AIC BIC KIC AdjR2 FH_R2
## 1 -160.8734 349.7468 387.9302 363.7468 0.6732735 0.9154796
##
## Residual diagnostics:
## Skewness Kurtosis Shapiro_W Shapiro_p
## Standardized_Residuals -0.3182545 3.638049 0.9862435 0.30392975
## Random_effects -0.4464279 4.597402 0.9759183 0.03890229
##
## Transformation: No transformation
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## ℹ The deprecated feature was likely used in the emdi package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?
## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?
## Press [enter] to continue
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the emdi package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## $qq_plots
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
##
## $density_res
##
## $density_ran
##
## $cooks_distance
## [1] NA
##
## $likelihood
## [1] NA
## $scatter_FH
##
## $line_FH
##
## $scatter_FH
## NULL
##
## $line_FH
## NULL
##
## $scatter_FH
## NULL
##
## $line_FH
## NULL
##
## $boxplot_MSE_FH
##
## $ordered_MSE_FH
##
## $boxplot_CV_FH
##
## $ordered_CV_FH
fh_model_full_log <- fh(
fixed = fh_formula,
vardir = "Var_Mean",
combined_data = combined_data,
domains = "ID_prov",
method = "reml",transformation = "log",backtransformation = "bc_crude",
MSE = TRUE
)
plot(fh_model_full_log)## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?
## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?
## [1] 0.2094632
## [1] 7684281
## [1] 7350282
## [1] 7350282
dat_BHF_census$ID_prov <- dat_BHF_census$ID_prov %>% as.factor()
# sapply(dat_BHF_census,FUN = function(x) sum(is.na(x)))
nrow(sample)## [1] 2260
## [1] 2260
## [1] 2260
dat_BHF_sample$ID_prov <- dat_BHF_sample$ID_prov %>% as.factor()
sapply(dat_BHF_sample,FUN = function(x) sum(is.na(x)))## ID_prov aestudio
## 0 0
## p26_edad ocu_military
## 0 0
## ocu_manager ocu_professional
## 0 0
## ocu_technician ocu_adminSupport
## 0 0
## ocu_serviceSales ocu_agriculture
## 0 0
## ocu_construction ocu_operators
## 0 0
## ocu_unskilled ocu_NaN
## 0 0
## ocu_1d_13_Missing sex_female
## 0 0
## sex_male read_knowing
## 0 0
## read_not_knowing read_not_specified
## 0 0
## urbrur_urban urbrur_rural
## 0 0
## health_insurance_yes health_insurance_no
## 0 0
## health_insurance_not_specified p30b_caja_Missing
## 0 0
## interior_plastered_yes interior_plastered_no
## 0 0
## v04_revoq_Missing car_yes
## 0 0
## car_no car_not_specified
## 0 0
## v18c_auto_Missing water_heater_yes
## 0 0
## water_heater_no water_heater_not_specified
## 0 0
## v18h_calefon_Missing kitchen_yes
## 0 0
## kitchen_no v12_cocina_Missing
## 0 0
## n
## 0
BHF_model_full_unoptimized <- NER_Trafo(fixed = bhf_formula
,smp_domains = "ID_prov"
, smp_data = dat_BHF_sample
, pop_data = dat_BHF_census
, pop_area_size = vec_pop_size
, pop_domains = "ID_prov"
, transformation = "no"
, seed = 2022
, MSE = T
)## [1] "More SAE methods for full population data and transformations are offered in the R package emdi."
## [1] 0.2790526
### model selection
full_model <- lm(bhf_formula,data = sample)
bhf_formula_intercept <- aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional +
ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture +
ocu_unskilled + sex_male + read_knowing + urbrur_urban +
health_insurance_yes + interior_plastered_yes + v04_revoq_Missing +
car_yes + water_heater_yes + kitchen_yes + (1|ID_prov)
lmer_model <- lmer(bhf_formula_intercept,data = sample)
# lmer_model %>% summary()
step(lmer_model,trace = 0)## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 21 -6068.9 12180
## (1 | ID_prov) 0 20 -6079.0 12198 20.159 1 7.125e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq NumDF DenDF F value
## v04_revoq_Missing 1 33.3 33.3 1 2240.7 2.7201
## p26_edad 0 5541.1 5541.1 1 2241.3 451.6769
## ocu_military 0 83.8 83.8 1 2222.2 6.8312
## ocu_manager 0 104.2 104.2 1 2239.5 8.4936
## ocu_professional 0 3637.3 3637.3 1 2231.0 296.4944
## ocu_technician 0 521.3 521.3 1 2217.6 42.4959
## ocu_adminSupport 0 178.7 178.7 1 2224.9 14.5638
## ocu_serviceSales 0 59.7 59.7 1 2231.4 4.8659
## ocu_agriculture 0 514.7 514.7 1 2236.7 41.9543
## ocu_unskilled 0 77.3 77.3 1 2221.1 6.3046
## sex_male 0 193.5 193.5 1 2218.0 15.7743
## read_knowing 0 4182.1 4182.1 1 2235.8 340.9007
## urbrur_urban 0 127.0 127.0 1 1222.8 10.3512
## health_insurance_yes 0 315.4 315.4 1 2242.0 25.7082
## interior_plastered_yes 0 311.8 311.8 1 2107.4 25.4185
## car_yes 0 97.7 97.7 1 2239.7 7.9621
## water_heater_yes 0 109.8 109.8 1 2237.5 8.9509
## kitchen_yes 0 114.0 114.0 1 2241.8 9.2940
## Pr(>F)
## v04_revoq_Missing 0.0992343 .
## p26_edad < 2.2e-16 ***
## ocu_military 0.0090184 **
## ocu_manager 0.0035994 **
## ocu_professional < 2.2e-16 ***
## ocu_technician 8.742e-11 ***
## ocu_adminSupport 0.0001392 ***
## ocu_serviceSales 0.0274934 *
## ocu_agriculture 1.145e-10 ***
## ocu_unskilled 0.0121132 *
## sex_male 7.364e-05 ***
## read_knowing < 2.2e-16 ***
## urbrur_urban 0.0013279 **
## health_insurance_yes 4.297e-07 ***
## interior_plastered_yes 5.009e-07 ***
## car_yes 0.0048189 **
## water_heater_yes 0.0028037 **
## kitchen_yes 0.0023259 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + ocu_unskilled + sex_male + read_knowing + urbrur_urban + health_insurance_yes + interior_plastered_yes + car_yes + water_heater_yes + kitchen_yes + (1 | ID_prov)
Suggested model by step funktion bhf_formula_optimized <- aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + ocu_unskilled + sex_male + read_knowing + urbrur_urban + health_insurance_yes + interior_plastered_yes + car_yes + water_heater_yes + kitchen_yes + (1 | ID_prov)
bhf_formula_optimized <- aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + ocu_unskilled + sex_male + read_knowing + urbrur_urban + health_insurance_yes + interior_plastered_yes + car_yes + water_heater_yes + kitchen_yes
BHF_model_full <- NER_Trafo(fixed = bhf_formula_optimized
,smp_domains = "ID_prov"
, smp_data = dat_BHF_sample
, pop_data = dat_BHF_census
, pop_area_size = vec_pop_size
, pop_domains = "ID_prov"
, transformation = "no"
, seed = 2022
, MSE = T
)## [1] "More SAE methods for full population data and transformations are offered in the R package emdi."
## [1] 0.2792847
## Nested error regression model (under transformations)
##
## Out-of-sample domains: 0
## In-sample domains: 113
##
## Transformation: No transformation
##
## Model fit:
## For model fit lme methods are applicable to saeTrafoObject$model
## where transformed_data equals smp_data transformed by function
## data_transformation using above given transformation and lambda
## and where fixed/list(fixed) equals aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional +
## ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture +
## ocu_unskilled + sex_male + read_knowing + urbrur_urban +
## health_insurance_yes + interior_plastered_yes + car_yes +
## water_heater_yes + kitchen_yes
## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?
## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:lubridate':
##
## stamp
p1 <- p_dig_FH$density_res
p2 <- p_dig_FH$density_res +
# scale_color_manual(values = c("red")) +
# scale_fill_manual(values = c("red")) +
geom_density(color = "#FF8552",fill = "#FF8552",alpha = .3, linewidth = 1.2) +
# geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
theme_minimal() +
labs(
title = "FH - Residual Assumption",
# subtitle = "Fay–Herriot model",
x = "standardized Residuals",
y = "Density",
# caption = "Source: household survey + census"
)
p3 <- p_dig_FH$density_ran
p4 <- p_dig_FH$density_ran +
# scale_color_manual(values = c("red")) +
# scale_fill_manual(values = c("red")) +
geom_density(color = "#FF8552",fill = "#FF8552",alpha = .3, linewidth = 1.2) +
# geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
theme_minimal() +
labs(
title = "FH - Randome Effects Assumption",
# subtitle = "Fay–Herriot model",
x = "standardized randome effects",
y = "Density",
# caption = "Source: household survey + census"
)
plot_grid(p1,p2,p3,p4,nrow = 2,ncol = 2)p5 <- p_dig_FH_comp$scatter_FH
p6 <- p_dig_FH_comp$scatter_FH +
geom_smooth(aes(color = "Reg. line"), method = "lm", se = FALSE) +
scale_color_manual(values = c("Reg. line" = "#9A031E", "Intersection" = "#FF8552")) +
theme_minimal() +
labs(
title = "direct vs fh estimates",
x = "direct",
y = "fh-model",
color = ""
)## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p7 <- p_dig_FH_comp$line_FH
p8 <- p_dig_FH_comp$line_FH +
# Override layer colors to be mapped instead of fixed
# geom_line(aes(color = c("Direct", "Model-based"))) +
# geom_line(size = .8,alpha = .3) +
# geom_point(size = 1, alpha = .5) +
scale_color_manual(values = c("#9A031E","#FF8552")) +
theme_minimal() +
labs(
title = "Direct vs FH estimates ordered by Domain size",
x = "Domain - ordered by decreasing MSE of Direct",
y = "mean years of schooling estimate",
color = "Method"
)## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p9 <- p_dig_FH_comp$ordered_MSE_FH
p10 <- p_dig_FH_comp$ordered_MSE_FH +
scale_color_manual(values = c("#9A031E","#FF8552")) +
geom_point(size = 2) +
theme_minimal() +
labs(
title = "MSE: direct vs FH",
# subtitle = "Fay–Herriot model",
x = "Domain - ordered by increasing MSE of Direct",
y = "MSE",
# caption = "Source: household survey + census"
)## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
ggsave("../output/diagnostics_fh.svg",
plot = plot_grid_fh,
width = 6, # width in inches
height = 16, # height in inches; make it long enough for 5 plots
units = "in", # units for width/height
device = "svg")
ggsave("../output/diagnostics_fh.png",
plot = plot_grid_fh,
width = 6, # width in inches
height = 16, # height in inches; make it long enough for 5 plots
units = "in", # units for width/height
dpi = 300)p1 <- p_dig_BHF$density_res
p2 <- p_dig_BHF$density_res +
# scale_color_manual(values = c("red")) +
# scale_fill_manual(values = c("red")) +
geom_density(color = "#7ca982",fill = "#7ca982",alpha = .3, linewidth = 1.2) +
# geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
theme_minimal() +
labs(
title = "BHF - Residual Assumption",
# subtitle = "Fay–Herriot model",
x = "standardized Residuals",
y = "Density",
# caption = "Source: household survey + census"
)
p3 <- p_dig_BHF$density_ran
p4 <- p_dig_BHF$density_ran +
# scale_color_manual(values = c("red")) +
# scale_fill_manual(values = c("red")) +
geom_density(color = "#7ca982",fill = "#7ca982",alpha = .3, linewidth = 1.2) +
# geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
theme_minimal() +
labs(
title = "BHF - Randome Effects Adsumption",
# subtitle = "Fay–Herriot model",
x = "standardized randome effects",
y = "Desnity",
# caption = "Source: household survey + census"
)
plot_grid(p1,p2,p3,p4,nrow = 2,ncol = 2)p5 <- p_dig_BHF_comp$scatter_Mean
p6 <- p_dig_BHF_comp$scatter_Mean +
geom_smooth(aes(color = "Reg. line"), method = "lm", se = FALSE) +
scale_color_manual(values = c("Reg. line" = "#4e6b57", "Intersection" = "#b5c5b4")) +
theme_minimal() +
labs(
title = "direct vs BHF estimates",
x = "direct",
y = "BHF-Mode",
color = ""
)## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p7 <- p_dig_BHF_comp$line_Mean
p8 <- p_dig_BHF_comp$line_Mean +
# Override layer colors to be mapped instead of fixed
# geom_line(aes(color = c("Direct", "Model-based"))) +
# geom_line(aes(size = .8,alpha = .3)) +
# geom_point(aes(size = 1, alpha = .2)) +
scale_color_manual(values = c("#4e6b57","#b5c5b4")) +
theme_minimal() +
labs(
title = "Direct vs BHF estimates ordered by Domain size",
x = "Domain - ordered by decreasing MSE of direct estimation",
y = "mean years of schooling estimate",
color = "Method"
)## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p9 <- p_dig_BHF_comp$ordered_MSE_Mean
p10 <- p_dig_BHF_comp$ordered_MSE_Mean +
scale_color_manual(values = c("#4e6b57","#b5c5b4")) +
geom_point(size = 2) +
theme_minimal() +
labs(
title = "MSE: direct vs BHF",
# subtitle = "Fay–Herriot model",
x = "Domain - ordered by increasing MSE of direct estimation",
y = "MSE",
# caption = "Source: household survey + census"
)## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
ggsave("../output/diagnostics_bhf.svg",
plot = plot_grid_bhf,
width = 6, # width in inches
height = 16, # height in inches; make it long enough for 5 plots
units = "in", # units for width/height
device = "svg")
ggsave("../output/disgnostics_bhf.png",
plot = plot_grid_bhf,
width = 6, # width in inches
height = 16, # height in inches; make it long enough for 5 plots
units = "in", # units for width/height
dpi = 300)